rm(list = ls())

here::i_am(file.path("code", "02_iv_census_outcomes.R"))
library(here)
source(here("code", "config.R"))

est_df <- read_parquet(here("data", "analysis", "analysis.parquet")) 

iv_alt_z <- feols(
  c(occscore_c1930_z, occscore_c1930) ~ 1 |
    board_identifier + birthyr_cards + bpl_cards + exemption^married_cards +
    farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore_c1930 = ifelse(empstat_c1930 == 20 | empstat_c1930 == 30, 0, occscore_c1930),
      employed_c1930 = empstat_c1930 == 10,
      unemployed_c1930 = empstat_c1930 == 20,
      selfemployed_c1930 = (classwkr_c1930 >= 10) & (classwkr_c1930 <= 20),
      literate_c1930 = lit_c1930 == 4,
      own_home_c1930 = ownershp_c1930 == 10,
      work_club_c1930 = ifelse(
        occ1950_c1930 == 999,
        NA,
        occ1950_c1930 == 260
      ),
      live_club_c1930 = ifelse(
        gqtype_c1930 == 999,
        NA,
        gqtype_c1930 %in% c(803, 804)
      ),
      ind_club_c1930 = ind1950_c1930 == 897,
      educ_c1940_gt6 = ifelse(
        educ_c1940 == 999,
        NA,
        educ_c1940 >= 24
      ),
      educ_c1940_college = ifelse(
        educ_c1940 == 999,
        NA,
        educ_c1940 >= 70
      ),
      incwage_c1940 = ifelse(empstat_c1940 >= 20, 0, incwage_c1940),
      across(c(is_naacp, married_c1930, occscore_c1930, incwage_pred_c1930,
               unemployed_c1930, selfemployed_c1930, presgl_c1930, literate_c1930,
               own_home_c1930, work_club_c1930, ind_club_c1930,
               live_club_c1930, incwage_c1940, educ_c1940_gt6, educ_c1940_college),
             list(z = ~ scale(.)[,1]))
    ),
  cluster = ~ serial_num
)

iv_alt_z <- feols(
  c(occscore_c1930_z, incwage_pred_c1930_z, unemployed_c1930_z, selfemployed_c1930_z,
    literate_c1930_z, own_home_c1930_z, incwage_c1940_z, educ_c1940_college_z,
    work_club_c1930_z, ind_club_c1930_z, live_club_c1930_z) ~ 1 |
    board_identifier + birthyr_cards + bpl_cards + exemption^married_cards +
    farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore_c1930 = ifelse(empstat_c1930 == 20 | empstat_c1930 == 30, 0, occscore_c1930),
      employed_c1930 = empstat_c1930 == 10,
      unemployed_c1930 = empstat_c1930 == 20,
      selfemployed_c1930 = (classwkr_c1930 >= 10) & (classwkr_c1930 <= 20),
      literate_c1930 = lit_c1930 == 4,
      own_home_c1930 = ownershp_c1930 == 10,
      work_club_c1930 = ifelse(
        occ1950_c1930 == 999,
        NA,
        occ1950_c1930 == 260
      ),
      live_club_c1930 = ifelse(
        gqtype_c1930 == 999,
        NA,
        gqtype_c1930 %in% c(803, 804)
      ),
      ind_club_c1930 = ind1950_c1930 == 897,
      educ_c1940_gt6 = ifelse(
        educ_c1940 == 999,
        NA,
        educ_c1940 >= 24
      ),
      educ_c1940_college = ifelse(
        educ_c1940 == 999,
        NA,
        educ_c1940 >= 70
      ),
      incwage_c1940 = ifelse(empstat_c1940 >= 20, 0, incwage_c1940),
      across(c(is_naacp, married_c1930, occscore_c1930, incwage_pred_c1930,
               unemployed_c1930, selfemployed_c1930, presgl_c1930, literate_c1930,
               own_home_c1930, work_club_c1930, ind_club_c1930,
               live_club_c1930, incwage_c1940, educ_c1940_gt6, educ_c1940_college),
             list(z = ~ scale(.)[,1]))
    ),
  cluster = ~ serial_num
)

p <- map_dfr(iv_alt_z, broom::tidy, conf.int = TRUE, .id = "spec") |>
  mutate(
    outcome = substr(spec, 6, nchar(spec)),
    outcome = factor(
      outcome,
      levels = c(
        "literate_c1930_z",
        "own_home_c1930_z",
        "unemployed_c1930_z",
        "selfemployed_c1930_z",
        "occscore_c1930_z",
        "incwage_pred_c1930_z",
        "incwage_c1940_z",
        "educ_c1940_gt6_z",
        "educ_c1940_college_z",
        "work_club_c1930_z",
        "ind_club_c1930_z",
        "live_club_c1930_z"
      ), labels = c(
        "Literate",
        "Owns home",
        "Unemployed",
        "Self-employed",
        "Occup. income",
        "Predicted income",
        "Income (1940)",
        "Education (1940)",
        "Any college (1940)",
        "Occupation",
        "Industry",
        "Residence"
      )
    ),
    cat = ifelse(
      outcome %in% c("Literate", "Owns home", "Unemployed", "Self-employed", "Occup. income",
                     "Predicted income", "Income (1940)", "Education (1940)",
                     "Any college (1940)"),
      "(a) Socioeconomic outcomes", 
      "(b) Club involvement"
    )
  ) |>
  ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = outcome)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  labs(x = "TSLS coefficient on veteran", y = "") +
  xlim(-.5, .5) +
  scale_y_discrete(limits = rev) +
  ggforce::facet_col(facets = vars(cat), scales = "free_y", space = "free")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "iv_alt_outcomes.pdf"), plot = p_color, width = 5, height = 4.2)

p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()
ggsave(file.path(fig_dir, "bw", "iv_alt_outcomes.pdf"), plot = p_bw, width = 5, height = 4.2)
